package org.lionsoul.websnail.sample;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.lionsoul.websnail.Spider;
import org.lionsoul.websnail.analyzer.Analyzer;
import org.lionsoul.websnail.downloader.Page;

/**
 * 页面分析器实现
 *
 * @author RockYang
 */
public class BlogAnalyzer implements Analyzer
{

    public void process(Spider spider, Page page)
    {
        // 将当前页面提取的所有链接加入到 TODO list
        String[] links = page.getLinks();
        for (String link : links) {
            if (link.endsWith(".html")) {
                spider.pushRequestUrl(link);
            }
        }
        // 解析页面
        Document doc = Jsoup.parse(page.getHtml());


        System.out.println(page.getUrl());
        System.out.println(doc.title());
    }

}
